############ Replication File: Holding Ground 
############ Baragwanath, Bayi, and Fasolin 
## Figures 1 and 2, S6, S7
## Tables S4, S5, S6 

#### NOTE S2, S3, S4, were made in arcmap

#packages
library(dplyr)
library(tidyr)
library(sf)
library(ggplot2)
library(stargazer)

rm(list = ls())
gc()

#Set working directory here
wd <- ""
############## Deforestation Plots: Figure 1 -----------------------------------------------------------------
# load --------------------------------------------------------------------
df <- readRDS(file.path(wd, "legal_amazon_deforestation_long.rds"))

# prepare data -----------------------------------------------------------
#create one variable identifying grid type
df <- df %>%
  mutate(grid_is = case_when(
    grid_is_it_hom == 1 ~ "IT",
    grid_is_sp_rec == 1 ~ "SP",
    grid_is_su_rec == 1 ~ "SU",
    TRUE ~ "Non-Conserved"
  ))

#identify grids in the arc of deforestation
df$arc <- ifelse(df$SIGLA_UF %in% c("PA", "MT", "RO", "AC"), 1, 0)

#remove Mato Grosso and Goais since they are neither in the arc or outside of the arc of deforestation
df <- df %>%
  filter(!SIGLA_UF %in% c("MS", "GO"))

yearly_def_by_category <- df %>%
  mutate(arc_label = ifelse(arc == 1, "Arc of Deforestation", "Non-Arc of Deforestation")) %>%
  group_by(year, grid_is, arc_label) %>%
  summarise(mean_def_area = mean(deforestation_area, na.rm = TRUE), .groups = "drop") %>%
  mutate(mean_def_pct = (mean_def_area / 4) * 100)


# plot --------------------------------------------------------------------

plot_deforestation <- ggplot(data = subset(yearly_def_by_category, year >= 2002), aes(x = year, y = mean_def_pct, color = grid_is)) +
  geom_line(linewidth = 2) +
  facet_wrap(~arc_label, nrow = 1) +
  
  # Vertical policy lines and labels
  geom_vline(xintercept = 2004, color = 'blue3', linetype = "dashed", alpha = 0.7) +
  geom_text(aes(x = 2004.4, y = 0.9, label = "PPCDAM"), angle = 90, color = 'blue3', alpha = 1.2, size = 12) + #1.18
  geom_vline(xintercept = 2012, color = 'black', linetype = "dashed", alpha = 0.7) +
  geom_text(aes(x = 2012.4, y = 0.93, label = "Forest Code"), angle = 90, color = "black", size = 12) +
  geom_vline(xintercept = 2019, color = 'red3', linetype = "dashed", alpha = 0.7) +
  geom_text(aes(x = 2019.4, y = 0.93, label = "Bolsonaro"), angle = 90, color = 'red3', alpha = 1.2, size = 12) +
  geom_text(aes(x = 2020.6, y = 0.98, label = "Elected"), angle = 90, color = 'red3', alpha = 1.2, size = 12) +
  
  # Colors
  scale_color_manual(
    values = c(
      "IT" = "#b2d8f4",
      "SP" = "#675d97",
      "SU" = "#bba9c3",
      "Non-Conserved" = "#7CA982"
    ),
    breaks = c("IT", "SP", "SU", "Non-Conserved"),
    guide = guide_legend(override.aes = list(linewidth = 6))
  ) +
  
  # Labels and theme
  labs(
    x = "Year",
    y = "Deforestation (%)",
    color = NULL
  ) +
  theme_minimal(base_size = 28) +
  theme(
    legend.position = "bottom",
    legend.text = element_text(size = 30),
    strip.text = element_text(size = 32, face = "bold"),
    axis.text = element_text(size = 30, color = "grey10"),
    axis.title = element_text(size = 34),
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(color = "black")
  ) 

plot_deforestation

ggsave(file.path(wd, "Replication_Holding_Ground/Figures","plot_deforestation.png"), plot_deforestation, width = 18, height = 11)

rm(list = ls())


############## Deforestation Maps: Figure 2 -----------------------------------------------------------------
# Load --------------------------------------------------------------------
#shapefile of protected areas
pas <- readRDS(file.path(wd, "pas.rds"))

#shapefile of Brazilian Legal Amazon (BLA) borders
legalam <- readRDS(file.path(wd, "legal_amazon.rds"))

#shapefile of deforestation throughout the BLA
def_legalam <- readRDS(file.path(wd, "legal_amazon_deforestation_avg_wide.rds"))

#shapefile for Brazilian states
states <- readRDS(file.path(wd, "brazil_states.rds"))
states$arc_states <- ifelse( states$SIGLA_UF %in% c("PA", "MT", "RO", "AC"), 1, 0) #identify arc states

#load shapefile of each zone
zoneA <- readRDS(file.path(wd, "zoneA.rds"))
zoneB <- readRDS(file.path(wd, "zoneB.rds"))
zoneC <- readRDS(file.path(wd, "zoneC.rds"))

#load label shapefile
labels <- readRDS(file.path(wd, "zone_labels.rds"))

# Clip data to each zone --------------------------------------------------
#clip protected areas to each zone
pa_zoneA <- st_crop(pas, st_bbox(zoneA))
pa_zoneB <- st_crop(pas, st_bbox(zoneB))
pa_zoneC <- st_crop(pas, st_bbox(zoneC))

#clip deforestation data to each zone
def_zoneA <- st_crop(def_legalam, st_bbox(zoneA))
def_zoneB <- st_crop(def_legalam, st_bbox(zoneB))
def_zoneC <- st_crop(def_legalam, st_bbox(zoneC))

#clip legal amazon boundaries to each zone
legalam_zoneA <- st_crop(legalam, st_bbox(zoneA))
legalam_zoneB <- st_crop(legalam, st_bbox(zoneB))
legalam_zoneC <- st_crop(legalam, st_bbox(zoneC))

# Assign label to each zone
zoneA$label <- "A"
zoneB$label <- "B"
zoneC$label <- "C"

#combine zones into one sf object
zones_all <- rbind(zoneA, zoneB, zoneC)

# Mapping Panel 1 ---------------------------------------------------------------------
map1 <- ggplot() +
  geom_sf(data = subset(states, arc_states == 1), color = NA, fill = "grey95") +
  geom_sf(data = pas, aes(fill = pa_type), color = NA) +
  scale_fill_manual(values = c("IT" = "#b2d8f4", "SP" = "#675d97", "SU" = "#bba9c3"),
                    name = "Protected Area Type") +
  geom_sf(data = legalam, fill = NA, color = "black", size = 0.3) +
  geom_sf(data = zones_all, fill = NA, color = "black", linewidth = 3) +
  geom_sf_text(data = labels, aes(label = label), size = 12,
               fontface = "bold", color = "black") +
  theme_void(base_size = 20) +
  theme(legend.position = "bottom")

ggsave(
  file.path(wd, "Replication_Holding_Ground/Figures", "map_pas_legalam.png"),
  plot = map1,
  width = 12, height = 10
)

# Mapping Zones A, B, and C -----------------------------------------------
#Define spatial zones to iterate over
zones <- list(
  "zoneA" = zoneA,
  "zoneB" = zoneB,
  "zoneC" = zoneC
)

#Define three periods
periods <- list(
  "2004-2011" = list(var = "def_pct_avg_2004_2011", end_year = 2011),
  "2012–2018" = list(var = "def_pct_avg_2012_2018", end_year = 2018),
  "2019–2022" = list(var = "def_pct_avg_2019_2022", end_year = 2022)
)

#Define classification thresholds for deforestations
bin_def <- function(x) {
  ifelse(x <= 0, 1,
         ifelse(x > 0 & x <= 0.5, 2,
                ifelse(x > 1 & x <= 2, 3,
                       ifelse(x > 2 & x <= 5, 4,
                              ifelse(x > 5 & x < 10, 5,
                                     ifelse(x >= 10, 6, 0))))))
}


#Main loop: generate maps by zone
for (zone_name in names(zones)) {
  
  # Use already cropped data
  def_zone <- get(paste0("def_", zone_name))
  pa_zone <- get(paste0("pa_", zone_name))
  legalam_zone <- get(paste0("legalam_", zone_name))
  
  # Store plots for each period
  period_plots <- list()
  
  for (p in names(periods)) {
    def_var <- periods[[p]]$var
    year_cutoff <- periods[[p]]$end_year
    
    # Prepare deforestation data
    def_period <- def_zone %>%
      mutate(def_val = !!sym(def_var),
             def_bin = bin_def(def_val)) %>%
      filter(def_bin != 1 & !is.na(def_bin)) %>%
      mutate(def_bin = factor(def_bin,
                              levels = c(2, 3, 4, 5, 6),
                              labels = c("< 1%", "< 2%", "< 5%", "< 10%", "≥ 10%")))
    
    # Filter CU data up to the end of the period
    pa_period <- pa_zone %>% filter(pa_year <= year_cutoff)
    
    # Create the map for this period
    p_map <- ggplot() +
      geom_sf(data = pa_period, aes(fill = pa_type), color = NA) +
      scale_fill_manual(values = c("IT" = "#b2d8f4", "SP" = "#675d97", "SU" = "#bba9c3"), guide = "none") +
      ggnewscale::new_scale_fill() +
      geom_sf(data = def_period, aes(fill = def_bin), color = NA) +
      scale_fill_manual(values = c("lightsalmon", "red1", "red3", "darkred", "#5C0000"), guide = "none") +
      geom_sf(data = legalam_zone, fill = NA, color = "black", size = 0.3) +
      ggtitle(paste(p)) +
      theme_void(base_size = 30) +
      theme(plot.title = element_text(size = 38, hjust = 0.5))
    
    # Add to the list
    period_plots[[p]] <- p_map
  }
  
  # Combine all period maps into a 2x2 grid
  combined_plot <- cowplot::plot_grid(plotlist = period_plots, ncol = 2)
  
  # Save combined plot
  ggsave(
    filename = file.path(wd, "Replication_Holding_Ground/Figures", paste0("map_pas_", zone_name, ".png")),
    plot = combined_plot,
    width = 16, height = 12
  )
}

rm(list = ls())
############## Budget: Figures S6 and S7 -----------------------------------------------------------------
# Load --------------------------------------------------------------------
budget<- readRDS(file.path(wd, "budget.rds"))

# Plot Proposed -----------------------------------------------------------
plot_proposed <- ggplot(dplyr::filter(budget, Budget_Type == "Proposed"),
                     aes(x = Year, y = Percentage)) +
  geom_line(color = "darkgreen", linewidth = 1.2) +
  geom_vline(xintercept = 2004, color = 'blue3', linetype = "dashed", alpha = 0.5) +
  annotate("text", x = 2004.5, y = 0.225, label = "PPCDAM",
           angle = 90, color = 'blue3', alpha = 0.7, size = 12) +
  geom_vline(xintercept = 2012, color = 'black', linetype = "dashed") +
  annotate("text", x = 2012.5, y = 0.235, label = "Forest",
           angle = 90, color = "black", size = 12) +
  annotate("text", x = 2013.7, y = 0.24, label = "Code",
           angle = 90, color = "black", size = 12) +
  geom_vline(xintercept = 2019, color = 'red3', linetype = "dashed", alpha = 0.5) +
  annotate("text", x = 2019.5, y = 0.225, label = "Bolsonaro",
           angle = 90, color = 'red3', alpha = 0.7, size = 12) +
  annotate("text", x = 2020.7, y = 0.225, label = "Elected",
           angle = 90, color = 'red3', alpha = 0.7, size = 12) +
  labs(x = "Year", y = "Percentage of Federal Budget (%)") +
  scale_y_continuous(limits = c(0, 0.247),
                     labels = scales::percent_format(scale = 1)) +
  theme_minimal(base_size = 35) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

ggsave(file.path(wd, "Replication_Holding_Ground/Figures", "budget_proposed.png"),
       plot_proposed, width = 18, height = 11)

# Plot Executed -----------------------------------------------------------
plot_executed <- ggplot(dplyr::filter(budget_long, Budget_Type == "Executed"),
                     aes(x = Year, y = Percentage)) +
  geom_line(color = "darkgreen", linewidth = 1.2) +
  geom_vline(xintercept = 2004, color = 'blue3', linetype = "dashed", alpha = 0.5) +
  annotate("text", x = 2004.5, y = 0.225, label = "PPCDAM",
           angle = 90, color = 'blue3', alpha = 0.7, size = 12) +
  geom_vline(xintercept = 2012, color = 'black', linetype = "dashed") +
  annotate("text", x = 2012.5, y = 0.235, label = "Forest",
           angle = 90, color = "black", size = 12) +
  annotate("text", x = 2013.7, y = 0.24, label = "Code",
           angle = 90, color = "black", size = 12) +
  geom_vline(xintercept = 2019, color = 'red3', linetype = "dashed", alpha = 0.5) +
  annotate("text", x = 2019.5, y = 0.225, label = "Bolsonaro",
           angle = 90, color = 'red3', alpha = 0.7, size = 12) +
  annotate("text", x = 2020.7, y = 0.225, label = "Elected",
           angle = 90, color = 'red3', alpha = 0.7, size = 12) +
  labs(x = "Year", y = "Percentage of Federal Budget (%)") +
  scale_y_continuous(limits = c(0, 0.247),
                     labels = scales::percent_format(scale = 1)) +
  theme_minimal(base_size = 35) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

ggsave(file.path(wd, "Replication_Holding_Ground/Figures", "budget_executed.png"),
       plot_executed, width = 18, height = 11)


############## Descriptive Statistics: Tables S4, S5, S6 -----------------------------------------------------------------
# Load and prepare  ------------------------------------------------------------
#load main dataset
df <- readRDS(file.path(wd, "df_pas.rds"))

#subset to analysis time frame (2004-2022)
df <- subset(df, year >= 2004 & year <= 2022) 

#Clean “near” variables to ensure they only apply after PA creation
# If the PA was designated AFTER the observation year, set distance to NA
# (i.e., a PA can't influence a year before it existed)
df$near_pa_rec <- ifelse(df$near_pa_ano >= df$year, df$near_pa, NA)
df$near_it_hom <- ifelse(df$near_it_hom_year >= df$year, df$near_it, NA)

# Subset to observations within a 12km radius of PAs/ITs
# Create two temporary datasets:
# df1: points within 12km of PAs
# df2: points within 12km of Indigenous Territories
# Then merge them back together
df1 <- df %>%
  filter(near_pa_rec <= 12000)
df2 <- df %>%
  filter(near_it_hom <= 12000)
df <- rbind(df1, df2)

# Define treatment for Indigenous Territories (ITs)
# t_it = 1 if:
#   - point is inside an IT (grid_is_it == 1)
#   - AND the year is >= IT designation year
# Otherwise 0, or NA if missing
df$t_it <- ifelse(df$grid_is_it == 1 & df$year >= df$near_it_hom_year, 1, 0)
df$t_it <- ifelse(is.na(df$t_it), NA, df$t_it)
table(df$t_it, useNA = "always")

# Create inside/outside distance variable for ITs
# Positive distance = inside
# Negative distance = outside
df$near_it <- ifelse(df$grid_is_it == 0, df$near_it*(-1), df$near_it)
summary(df$near_it)


# Keep only the last 4 digits of near_pa_ano (extract year)
# This handles cases where near_pa_ano contains full timestamps
df <- df %>%
  mutate(near_pa_ano = substr(near_pa_ano, nchar(near_pa_ano) - 3, 
                              nchar(near_pa_ano))) 

# Treatment for Strictly Protected Areas (SP)
# t_sp = 1 if point is inside an SP AND year >= PA creation year
df$t_sp <- ifelse(df$grid_is_sp == 1 & df$year >= df$near_pa_ano, 1, 0)
df$t_sp <- ifelse(is.na(df$t_sp), NA, df$t_sp)
table(df$t_sp, useNA = "always")

# Treatment for Sustainable Use PAs (SU)
# t_su = 1 if inside SU PA AND year >= PA creation year
df$t_su <- ifelse(df$grid_is_su == 1 & df$year >= df$near_pa_ano, 1, 0)
df$t_su <- ifelse(is.na(df$t_su), NA, df$t_su)
table(df$t_su, useNA = "always")

## Inside - Outside
df$near_sp <- ifelse(df$near_pa_type == "Proteção Integral", df$near_pa, 
                     as.numeric(NA))

# Create inside/outside distance variables separately for:
#   - Strict Protection PAs (near_sp)
#   - Sustainable Use PAs (near_su)
#
# near_sp / near_su defined only for PAs of the correct type;
# all other types set to NA.
#
# Again: positive = inside, negative = outside.
df$near_sp <- ifelse(df$grid_is_sp == 0, df$near_sp*(-1), df$near_sp)
summary(df$near_sp)

df$near_su <- ifelse(df$near_pa_type == "Uso Sustentável", df$near_pa, 
                     as.numeric(NA))

df$near_su <- ifelse(df$grid_is_su == 0, df$near_su*(-1), df$near_su)
summary(df$near_su)

# Restrict analysis to Federal protected areas only
# (“near_pa_gov” identifies the level of government)
df <- df %>%
  filter(near_pa_gov == "Federal")

# Indigenous Territories  ----------------------------------------------------------
#subset to gridcells around ITs
df_it <- df %>%
  filter(abs(near_it) <= 12000)

#get summary of key variables
selected_it <- df_it %>%
  select(deforestation_area, deforestation_pct, 
         t_it,
         near_it, 
         near_roads_05, near_rivers, near_cities,
         elevation, hillshade, slope) %>%
  mutate(across(starts_with("near_"), ~ . / 1000))
selected_it <- as.data.frame(selected_it)

#Get latex table
stargazer(selected_it, 
          type =  "latex",
          digits = 2,
          title = "Indigenous Territories",
          covariate.labels = c("Deforestation (km2)","Deforestation (%)",
                               "IT Dummy",
                               "Distance to IT (km)",
                               "Distance to Roads (2005, km)",
                               "Distance to Rivers (km)",
                               "Distance to Cities (km)",
                               "Elevation (m)",
                               "Hillshade",
                               "Slope"))



# Strictly Protected Areas ------------------------------------------------------------
#subset to gridcells around SPs
df_sp <- df %>%
  filter(abs(near_sp) <= 12000)

#get summary of key variables
selected_sp <- df_sp %>%
  select(deforestation_area, deforestation_pct, 
         t_sp, 
         near_sp,
         near_roads_05, near_rivers, near_cities,
         elevation, hillshade, slope) %>%
  mutate(across(starts_with("near_"), ~ . / 1000))
selected_sp <- as.data.frame(selected_sp)

#Get latex table
stargazer(selected_sp, 
          type =  "latex",
          digits = 2,
          title = "Strictly Protected",
          covariate.labels = c("Deforestation (km2)","Deforestation (%)",
                               "SP Dummy",
                               "Distance to SP (km)",
                               "Distance to Roads (2005, km)",
                               "Distance to Rivers (km)",
                               "Distance to Cities (km)",
                               "Elevation (m)",
                               "Hillshade",
                               "Slope"))

# Sustainable Use Areas ------------------------------------------------------------
#subset to gridcells around SUs
df_su <- df %>%
  filter(abs(near_su) <= 12000)

#get summary of key variables
selected_su <- df_su %>%
  select(deforestation_area, deforestation_pct, 
         t_su, 
         near_su, 
         near_roads_05, near_rivers, near_cities,
         elevation, hillshade, slope) %>%
  mutate(across(starts_with("near_"), ~ . / 1000))
selected_su <- as.data.frame(selected_su)

#Get latex table
stargazer(selected_su, 
          type =  "latex",
          digits = 2,
          title = "Sutainable Use",
          covariate.labels = c("Deforestation (km2)","Deforestation (%)",
                               "SU Dummy", 
                               "Distance to SU (km)",
                               "Distance to Roads (2005, km)",
                               "Distance to Rivers (km)",
                               "Distance to Cities (km)",
                               "Elevation (m)",
                               "Hillshade",
                               "Slope"))


rm(list = ls())


############## Treatment Distribution: Figure S8 -----------------------------------------------------------------
# Load --------------------------------------------------------------------
#shapefile of protected areas, remove geometry
pas <- readRDS(file.path(wd, "pas.rds"))
pas <- st_drop_geometry(pas)

# Plot -------------------------------------------------------------------
#get cumulative count of PAs over time
pas_clean <- pas %>%
  mutate(pa_year = as.numeric(pa_year)) %>%
  group_by(pa_year, pa_type) %>%
  summarise(n = n(), .groups = "drop") %>%
  arrange(pa_type, pa_year) %>%
  group_by(pa_type) %>%
  mutate(cumulative = cumsum(n)) %>%
  ungroup()

plot_pas <- ggplot(pas_clean, aes(x = pa_year, y = cumulative)) +
  geom_line(size = 1, color = "black") +
  facet_wrap(~ pa_type, ncol = 1, scales = "free_y") +
  scale_x_continuous(
    breaks = seq(min(pas_clean$pa_year, na.rm = TRUE),
                 max(pas_clean$pa_year, na.rm = TRUE),
                 by = 10)
  ) +
  labs(
    x = "Year of Recognition",
    y = "Cumulative Number of PAs"
  ) +
  theme_minimal(base_size = 35) +
  theme(
    strip.text = element_text( face = "bold"),
    #panel.background = element_blank(),
   #panel.grid.major = element_blank(),
    #panel.grid.minor = element_blank())
  )


plot_pas

ggsave(file.path(wd, "Replication_Holding_Ground/Figures", "pas_designated.png"),
       plot_pas, width = 15, height = 13)


